# "Efficiency and water use: Dynamic effects of irrigation technology adoption"
# by Micah Cameron-Harp and Nathan Hendricks
# 
# Code written by Micah Cameron-Harp
# May 16th, 2024
# 
# This r script performs the parallel trends test from Callaway and
#   Sant'Anna (2020) and exports the results for Table D.1 and D.2. 
pacman::p_load(
  did,
  tidyverse,
  openxlsx,
  dplyr)

# Parse arguments submitted by STATA
args = commandArgs(trailingOnly = "TRUE")
arg1 <- args[1]

#Define directories and set working directory
# NOTE - The root directory will be passed here from the main_text_results.do file 
dr_root <- paste0(arg1)
dr_data <- paste0(dr_root, "/data")
dr_output = paste0(dr_root, "/outputs")
dr_output_main = paste0(dr_root, "/outputs/main_text")
dr_output_app = paste0(dr_root, "/outputs/appendices")
dr_output_log = paste0(dr_root, "/outputs/logs")
dr_temp = paste0(dr_root, "/data/intermediate")
setwd(dr_temp)

#Make the order of dependant variables
effect_order <- c("af_used", "acres_irr", "depth_applied")

#Make an empty dataframe to store results
df_template <- data.frame(matrix(ncol=6,nrow=0, 
                                 dimnames=list(NULL, c("tech", "start_year", "end_year",
                                                       "af_used_pval", "acres_irr_pval", "depth_applied_pval"))))
#####################
#FLOOD TO CP/LEPA
  #Import data
  wrg_flood_cplepa <- read.csv(file = file.path(dr_temp, "flood_cporlepa_prepped.csv"))

  #Set value of tech
  tech_name <- "flood_to_cplepa"
  
  #Perform placebo test in a loop for a variety of time windows 
    #Make a copy of the df_template dataframe to store results
    flood_cplepa_pretrend_df <- df_template
    #Define the years we want to test
    start_years <- c(1991, 1996, 2001, 2006, 2011)
    end_years <- c(1995, 2000, 2005, 2010, 2015)
    #Outer loop through start years
    for (st_yr in start_years) {
      cur_start_year <- st_yr
      #Loop through end years
      for (end_yr in end_years) {
        #Only run if end year is after start year
        if (end_yr > st_yr) {
          cur_end_year <- end_yr
          wrg_flood_cplepa_input <- wrg_flood_cplepa %>% dplyr::filter(wua_year>= cur_start_year & wua_year<=cur_end_year)  
          #Make an empty vector for each dependant variable's pvalues
          pval_row <- data.frame(tech = tech_name, start_year = cur_start_year, end_year = cur_end_year)
          #Get the ATT_GTs
          for (dep_var in effect_order) {
            att_values <- att_gt(yname = dep_var,
                                  tname = "wua_year",
                                  idname = "WR_GROUP",
                                  gname = "cohort_flood_cplepa",
                                  data = wrg_flood_cplepa_input,
                                  panel = TRUE,
                                  allow_unbalanced_panel = TRUE,
                                  xformla=~hyd_cond+predev_sat+awc+sandtotal+silttotal+init_af_used,
                                  est_method = "dr",
                                  control_group = c("notyettreated"),
                                  anticipation=0,
                                  clustervars = "WR_GROUP", 
                                  biters=1000)
            #Store the p-value for the test that pre-trend coefficients are zero
            p_val <- c(att_values$Wpval)
            #Append it to the 
            pval_row <- cbind(pval_row, p_val)
            #make new variable name and rename
            pval_new_name <- paste0(dep_var, "_pval")
            pval_row <- pval_row %>% dplyr::rename(!!pval_new_name := p_val)
          }
        flood_cplepa_pretrend_df <- rbind(flood_cplepa_pretrend_df, pval_row)
        } #End the if statement
      } #End inner loop through end years
    }  #End outer loop through start years
    #Save it as csv which is used to make Table D.1 
    write.csv(flood_cplepa_pretrend_df, file = file.path(dr_output_app, "tableD1.csv"), row.names = FALSE)
    #Remove unnceccesary objects
    rm(wrg_flood_cplepa, wrg_flood_cplepa_input, flood_cplepa_pretrend_df)
    gc()
    
#################################################
#CP to LEPA
    wrg_cp_lepa <- read.csv(file = file.path(dr_temp, "cp_lepa_prepped.csv"))
    
    #Set value of tech
    tech_name <- "cp_to_lepa"
    
    #Perform placebo test in a loop for a variety of time windows 
    #Make a copy of the df_template dataframe to store results
    cp_lepa_pretrend_df <- df_template
    #Define the years we want to test
    start_years <- c(1991, 1996, 2001, 2006, 2011, 2016)
    end_years <- c(1995, 2000, 2005, 2010, 2015, 2019)
    #Outer loop through start years
    for (st_yr in start_years) {
      cur_start_year <- st_yr
      #Loop through end years
      for (end_yr in end_years) {
        #Only run if end year is after start year
        if (end_yr > st_yr) {
          cur_end_year <- end_yr
          wrg_cp_lepa_input <- wrg_cp_lepa %>% dplyr::filter(wua_year>= cur_start_year & wua_year<=cur_end_year)  
          #Make an empty vector for each dependant variable's pvalues
          pval_row <- data.frame(tech = tech_name, start_year = cur_start_year, end_year = cur_end_year)
          #Get the ATT_GTs
          for (dep_var in effect_order) {
            att_values <- att_gt(yname = dep_var,
                                 tname = "wua_year",
                                 idname = "WR_GROUP",
                                 gname = "cohort_cp_lepa",
                                 data = wrg_cp_lepa_input,
                                 panel = TRUE,
                                 allow_unbalanced_panel = TRUE,  
                                 # xformla = NULL,
                                 xformla=~hyd_cond+predev_sat+sandtotal+silttotal+init_af_used,
                                 est_method = "dr",
                                 control_group = c("notyettreated"),
                                 anticipation=0,
                                 clustervars = "WR_GROUP", 
                                 biters=1000)
            #Store the p-value for the test that pre-trend coefficients are zero
            p_val <- c(att_values$Wpval)
            #Append it to the 
            pval_row <- cbind(pval_row, p_val)
            #make new variable name and rename
            pval_new_name <- paste0(dep_var, "_pval")
            pval_row <- pval_row %>% dplyr::rename(!!pval_new_name := p_val)
          }
          cp_lepa_pretrend_df <- rbind(cp_lepa_pretrend_df, pval_row)
        } #End the if statement
      } #End inner loop through end years
    }  #End outer loop through start years
    #Save it as csv which is used to make Table D.2 
    write.csv(cp_lepa_pretrend_df, file = file.path(dr_output_app, "tableD2.csv"), row.names = FALSE)